Biomarkers with absolute values:
#We need a FC matrix that contains only the data from Vorinostat-treated celllines:
UntreatedVorinostatcolumns <- grep(pattern = "vorinostat",colnames(Untreated))
TreatedVorinostatcolumns <- grep(pattern = "vorinostat",colnames(Treated))
UntreatedVorinostat <- Untreated[,UntreatedVorinostatcolumns]
TreatedVorinostat <- Treated[,TreatedVorinostatcolumns]
FCVorinostat <- TreatedVorinostat - UntreatedVorinostat
# work with mean of the rows because we only want to compare the genes:
FCVorinostatabs= abs(FCVorinostat)
FCVorinostatmean <- apply(FCVorinostatabs, 1, mean)
# sort the values to get the 100 largest values:
sortedgeneralbiomarker <- sort(FCVorinostatmean, decreasing = TRUE)
sortedgeneralbiomarker <- as.matrix(sortedgeneralbiomarker)
#select the top 100 general biomarkers:
top100generalbiomarkers = sortedgeneralbiomarker[1:100,]
top100generalbiomarkers <- as.matrix(top100generalbiomarkers)
#create vector with gene names:
generalbiomarkergenes = row.names(top100generalbiomarkers)
Gene Ontology analysis, more plots and information, but inconclusive
library(clusterProfiler)
##
## clusterProfiler v3.10.1 For help: https://guangchuangyu.github.io/software/clusterProfiler
##
## If you use clusterProfiler in published research, please cite:
## Guangchuang Yu, Li-Gen Wang, Yanyan Han, Qing-Yu He. clusterProfiler: an R package for comparing biological themes among gene clusters. OMICS: A Journal of Integrative Biology. 2012, 16(5):284-287.
library(org.Hs.eg.db)
## Loading required package: AnnotationDbi
## Loading required package: stats4
## Loading required package: BiocGenerics
## Loading required package: parallel
##
## Attaching package: 'BiocGenerics'
## The following objects are masked from 'package:parallel':
##
## clusterApply, clusterApplyLB, clusterCall, clusterEvalQ,
## clusterExport, clusterMap, parApply, parCapply, parLapply,
## parLapplyLB, parRapply, parSapply, parSapplyLB
## The following objects are masked from 'package:stats':
##
## IQR, mad, sd, var, xtabs
## The following objects are masked from 'package:base':
##
## Filter, Find, Map, Position, Reduce, anyDuplicated, append,
## as.data.frame, basename, cbind, colMeans, colSums, colnames,
## dirname, do.call, duplicated, eval, evalq, get, grep, grepl,
## intersect, is.unsorted, lapply, lengths, mapply, match, mget,
## order, paste, pmax, pmax.int, pmin, pmin.int, rank, rbind,
## rowMeans, rowSums, rownames, sapply, setdiff, sort, table,
## tapply, union, unique, unsplit, which, which.max, which.min
## Loading required package: Biobase
## Welcome to Bioconductor
##
## Vignettes contain introductory material; view with
## 'browseVignettes()'. To cite Bioconductor, see
## 'citation("Biobase")', and for packages 'citation("pkgname")'.
## Loading required package: IRanges
## Loading required package: S4Vectors
##
## Attaching package: 'S4Vectors'
## The following object is masked from 'package:base':
##
## expand.grid
##
Enrichment with “enrichPathway”:
library(ReactomePA)
## ReactomePA v1.26.0 For help: https://guangchuangyu.github.io/ReactomePA
##
## If you use ReactomePA in published research, please cite:
## Guangchuang Yu, Qing-Yu He. ReactomePA: an R/Bioconductor package for reactome pathway analysis and visualization. Molecular BioSystems 2016, 12(2):477-479
# biomarkers
gene.df <- bitr(generalbiomarkergenes, fromType = "SYMBOL",
toType = c("ENSEMBL", "ENTREZID"),
OrgDb = org.Hs.eg.db)
## 'select()' returned 1:many mapping between keys and columns
## Warning in bitr(generalbiomarkergenes, fromType = "SYMBOL", toType =
## c("ENSEMBL", : 10% of input gene IDs are fail to map...
x <- enrichPathway(gene=gene.df$ENTREZID, pvalueCutoff = 0.05, readable = T )
head(as.data.frame(x))
## ID
## R-HSA-2559586 R-HSA-2559586
## R-HSA-2559583 R-HSA-2559583
## R-HSA-2262752 R-HSA-2262752
## R-HSA-2559582 R-HSA-2559582
## R-HSA-2559584 R-HSA-2559584
## R-HSA-8852276 R-HSA-8852276
## Description
## R-HSA-2559586 DNA Damage/Telomere Stress Induced Senescence
## R-HSA-2559583 Cellular Senescence
## R-HSA-2262752 Cellular responses to stress
## R-HSA-2559582 Senescence-Associated Secretory Phenotype (SASP)
## R-HSA-2559584 Formation of Senescence-Associated Heterochromatin Foci (SAHF)
## R-HSA-8852276 The role of GTSE1 in G2/M progression after G2 checkpoint
## GeneRatio BgRatio pvalue p.adjust qvalue
## R-HSA-2559586 10/60 80/10554 1.948831e-11 9.140019e-09 6.482428e-09
## R-HSA-2559583 12/60 195/10554 7.268531e-10 1.704471e-07 1.208872e-07
## R-HSA-2262752 16/60 426/10554 1.096103e-09 1.713575e-07 1.215328e-07
## R-HSA-2559582 8/60 110/10554 1.757728e-07 2.060936e-05 1.461689e-05
## R-HSA-2559584 4/60 16/10554 1.632265e-06 1.531064e-04 1.085886e-04
## R-HSA-8852276 6/60 75/10554 3.883378e-06 3.035507e-04 2.152890e-04
## geneID
## R-HSA-2559586 HIST1H1C/HMGA2/HIST1H2BD/LMNB1/HIST2H2BE/HIST1H4H/CCNA1/TP53/HIST1H2AE/CDKN1A
## R-HSA-2559583 HIST1H1C/HMGA2/HIST1H2BD/LMNB1/HIST2H2BE/FOS/NFKB1/HIST1H4H/CCNA1/TP53/HIST1H2AE/CDKN1A
## R-HSA-2262752 HIST1H1C/TUBB2B/HMGA2/HIST1H2BD/LMNB1/HIST2H2BE/FOS/NFKB1/PSMB10/CITED2/HIST1H4H/CCNA1/TP53/TUBB4A/HIST1H2AE/CDKN1A
## R-HSA-2559582 HIST1H2BD/HIST2H2BE/FOS/NFKB1/HIST1H4H/CCNA1/HIST1H2AE/CDKN1A
## R-HSA-2559584 HIST1H1C/HMGA2/LMNB1/TP53
## R-HSA-8852276 TUBB2B/PSMB10/GTSE1/TP53/TUBB4A/CDKN1A
## Count
## R-HSA-2559586 10
## R-HSA-2559583 12
## R-HSA-2262752 16
## R-HSA-2559582 8
## R-HSA-2559584 4
## R-HSA-8852276 6
# visualization
barplot(x,showCategory=12)
dotplot(x, showCategory=12)
cnetplot(x,categorySize="geneNum")
emapplot(x)
library(KEGG.db)
##
## KEGG.db contains mappings based on older data because the original
## resource was removed from the the public domain before the most
## recent update was produced. This package should now be
## considered deprecated and future versions of Bioconductor may
## not have it available. Users who want more current data are
## encouraged to look at the KEGGREST or reactome.db packages
library(org.Hs.eg.db)
library(enrichplot)
gene.df <- bitr(generalbiomarkergenes, fromType = "SYMBOL",
toType = c("ENSEMBL", "ENTREZID"),
OrgDb = org.Hs.eg.db)
## 'select()' returned 1:many mapping between keys and columns
## Warning in bitr(generalbiomarkergenes, fromType = "SYMBOL", toType =
## c("ENSEMBL", : 10% of input gene IDs are fail to map...
kk <- enrichKEGG(gene=gene.df$ENTREZID,pvalueCutoff = 0.05,pAdjustMethod = "none")
head(summary(kk))
## Warning in summary(kk): summary method to convert the object to data.frame
## is deprecated, please use as.data.frame instead.
## ID Description GeneRatio
## hsa05202 hsa05202 Transcriptional misregulation in cancer 8/49
## hsa04210 hsa04210 Apoptosis 6/49
## hsa05203 hsa05203 Viral carcinogenesis 7/49
## hsa05166 hsa05166 Human T-cell leukemia virus 1 infection 7/49
## hsa05161 hsa05161 Hepatitis B 6/49
## hsa04115 hsa04115 p53 signaling pathway 4/49
## BgRatio pvalue p.adjust qvalue
## hsa05202 186/7867 1.652177e-05 1.652177e-05 0.002191308
## hsa04210 136/7867 1.814709e-04 1.814709e-04 0.009781510
## hsa05203 201/7867 2.212484e-04 2.212484e-04 0.009781510
## hsa05166 219/7867 3.737403e-04 3.737403e-04 0.012392443
## hsa05161 163/7867 4.827212e-04 4.827212e-04 0.012804814
## hsa04115 72/7867 1.001411e-03 1.001411e-03 0.018012898
## geneID Count
## hsa05202 8091/4790/330/4291/8900/7157/1026/4804 8
## hsa04210 4001/2353/4790/330/7157/637 6
## hsa05203 3017/8349/4790/8365/8900/7157/1026 7
## hsa05166 8061/2353/4790/8829/8900/7157/1026 7
## hsa05161 2353/4790/8900/7157/1026/637 6
## hsa04115 51512/7157/1026/637 4
# visualization
# FC
library(enrichplot)
library(DOSE)
## Warning: package 'DOSE' was built under R version 3.5.2
## DOSE v3.8.2 For help: https://guangchuangyu.github.io/DOSE
##
## If you use DOSE in published research, please cite:
## Guangchuang Yu, Li-Gen Wang, Guang-Rong Yan, Qing-Yu He. DOSE: an R/Bioconductor package for Disease Ontology Semantic and Enrichment analysis. Bioinformatics 2015, 31(4):608-609
top100generalbiomarkers=as.data.frame(top100generalbiomarkers)
colnames(top100generalbiomarkers)[1] <- "fold Change"
top100generalbiomarkers=cbind(top100generalbiomarkers,generalbiomarkergenes)
colnames(top100generalbiomarkers)[2] <- "genes"
FC <- top100generalbiomarkers$`fold Change`
names(FC) <- gene.df$ENTREZID
barplot(kk,showCategory=100)
dotplot(kk, showCategory=100)
cnetplot(kk,categorySize="geneNum",foldChange = FC)
cnetplot(kk,categorySize="geneNum",circular=TRUE)
emapplot(kk)
heatplot(kk,foldChange = FC)
upsetplot(kk)
# path
library(pathview)
## Warning: package 'pathview' was built under R version 3.5.2
## ##############################################################################
## Pathview is an open source software package distributed under GNU General
## Public License version 3 (GPLv3). Details of GPLv3 is available at
## http://www.gnu.org/licenses/gpl-3.0.html. Particullary, users are required to
## formally cite the original Pathview paper (not just mention it) in publications
## or products. For details, do citation("pathview") within R.
##
## The pathview downloads and uses KEGG data. Non-academic uses may require a KEGG
## license agreement (details at http://www.kegg.jp/kegg/legal.html).
## ##############################################################################
pathview(gene.data = FC,
pathway.id = "hsa05202",
species = "hsa",
limit = list(gene=5, cpd=1))
## 'select()' returned 1:1 mapping between keys and columns
## Info: Working in directory /Users/laura.plutowski/Desktop/Uni/4.Semester/projekt/project-02-group-05/Pathways
## Info: Writing image file hsa05202.pathview.png